#front matter
rm(list=ls())
options(scipen=12)
set.seed(613201912,kind="Mersenne-Twister")

#load libraries
library(akima) #for drawing image/surface plots
library(geoR) #for variogram analyses
library(scatterplot3d) #for drop line plot
library(lattice) #for density plots
library(rgdal)
library(car)
library(xtable)

#load California data
ca<-readRDS("CAslim.rds")
#saveRDS(ca,file="CAslim.rds",version=2)
#write.csv(ca,"CAslim.csv",row.names=F)

#eliminate out-of-bounds observations
ca<-ca[ca$Residence_Addresses_Longitude< -100,]

#predictor variables
names(ca)
table(ca$Vote_Frequency)
summary(ca$Voters_Age)
table(ca$Voters_Gender)
table(ca$CommercialData_EstimatedHHIncome)
ca$female<-recode(ca$Voters_Gender,'"F"=1;"M"=0;""=NA')
ca$income<-recode(ca$CommercialData_EstimatedHHIncome,
'"$1000-14999"=0;"$15000-24999"=1;"$25000-34999"=2;"$35000-49999"=3;"$50000-74999"=4;"$75000-99999"=5;"$100000-124999"=6;"$125000-149999"=7;"$150000-174999"=8;"$175000-199999"=9;"$200000-249999"=10;"$250000+"=11;""=NA')
table(ca$EthnicGroups_EthnicGroup1Desc)
ca$asian<-as.numeric(ca$EthnicGroups_EthnicGroup1Desc=="East and South Asian")#reference group is European, other, or unknown
ca$black<-as.numeric(ca$EthnicGroups_EthnicGroup1Desc=="Likely African-American")
ca$latino<-as.numeric(ca$EthnicGroups_EthnicGroup1Desc=="Hispanic and Portuguese")
table(ca$CommercialData_Education)
ca$college<-as.numeric(ca$CommercialData_Education=="Bach Degree - Extremely Likely" | ca$CommercialData_Education=="Bach Degree - Likely" | ca$CommercialData_Education=="Bach Degree - Likely" | ca$CommercialData_Education=="Grad Degree - Extremely Likely" | ca$CommercialData_Education=="Grad Degree - Likely")#reference group is less than high school, high school grad, some college, or unknown
ca<-subset(ca,subset=!is.na(Voters_Age)&!is.na(Vote_Frequency)&!is.na(female)&!is.na(income)&!is.na(asian)&!is.na(black)&!is.na(latino)&!is.na(college)&!is.na(Residence_Addresses_Longitude)&!is.na(Residence_Addresses_Latitude))
summary(ca)

#political variables
summary(ca$FECDonors_AvgDonation)
table(substr(ca$FECDonors_LastDonationDate,7,10))
summary(ca$FECDonors_NumberOfDonations)
summary(ca$FECDonors_TotalDonationsAmount)###
table(ca$FECDonors_AvgDonation_Range,ca$FECDonors_TotalDonationsAmt_Range)

#map of the data
pdf("caDonorMap.pdf")
plot(x=ca$Residence_Addresses_Longitude,y=ca$Residence_Addresses_Latitude,pch=20,cex=.1,xlab="Longitude",ylab="Latitude")
dev.off()
ca$log.donations<-log(ca$FECDonors_TotalDonationsAmount)

#code eastings and northings
obj <- cbind(ca$Residence_Addresses_Longitude, ca$Residence_Addresses_Latitude, ca$log.donations)
obj.2<-SpatialPoints(obj[,1:2],proj4string=CRS("+proj=longlat"))
obj.3<-spTransform(obj.2, CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))
obj.4<-cbind(obj.3@coords,ca$log.donations)
obj.5<-as.geodata(obj.4, coords.col=1:2, data.col=3)
donations.geo<-jitterDupCoords(obj.5,max=.1)
ca$eastings<-donations.geo$coords[,1]
ca$northings<-donations.geo$coords[,2]

###CONTRIBUTORS BY LEFT AND RIGHT###
ca$right<-as.numeric(grepl("(Con)",ca$FECDonors_PrimaryRecipientOfContributions))
ca$left<-as.numeric(grepl("(Lib)",ca$FECDonors_PrimaryRecipientOfContributions))
ca$left[ca$FECDonors_PrimaryRecipientOfContributions=="WILLIAMSON MARIANNE"]<-1
ca$left[ca$FECDonors_PrimaryRecipientOfContributions=="CA ARMENIAN AMER DEMS (CA)"]<-1
ca$left[ca$FECDonors_PrimaryRecipientOfContributions=="NTL DEM TRAINING CMTE PAC"]<-1
ca$left[ca$FECDonors_PrimaryRecipientOfContributions=="KENNEDY-SINEMA VICTORY FUND"]<-1
ca$left[ca$FECDonors_PrimaryRecipientOfContributions=="DEM ALLIANCE FOR ACTION"]<-1
ca$left[ca$FECDonors_PrimaryRecipientOfContributions=="DEMS OF NORTH ORANGE CNTY (CA)"]<-1
ca$left[ca$FECDonors_PrimaryRecipientOfContributions=="DEMOC PAC (CA)"]<-1
ca$left[ca$FECDonors_PrimaryRecipientOfContributions=="DEM HEADQUARTERS OF THE DESERT (DHQD) (CA)"]<-1
ca$left[ca$FECDonors_PrimaryRecipientOfContributions=="DEM CAMP CNCL OF VENTURA CNTY - FED (CA)"]<-1
ca$left[ca$FECDonors_PrimaryRecipientOfContributions=="BLUE OH PAC"]<-1
ca$left[grepl("DEMS FOR ISRAEL",ca$FECDonors_PrimaryRecipientOfContributions)]<-1
ca$left[grepl("WELLSTONE",ca$FECDonors_PrimaryRecipientOfContributions)]<-1
ca$left[grepl("DEM CLUB",ca$FECDonors_PrimaryRecipientOfContributions)]<-1
ca$left[grepl("YOUNG DEMS",ca$FECDonors_PrimaryRecipientOfContributions)]<-1
ca$right[ca$FECDonors_PrimaryRecipientOfContributions=="MCMULLIN EVAN/MINDY FINN"]<-1
ca$right[ca$FECDonors_PrimaryRecipientOfContributions=="MAKE AMER GREAT"]<-1
ca$right[ca$FECDonors_PrimaryRecipientOfContributions=="MAGA COALITION INC"]<-1
ca$right[ca$FECDonors_PrimaryRecipientOfContributions=="TEA PTY FORWARD"]<-1
ca$right[ca$FECDonors_PrimaryRecipientOfContributions=="CMTE TO RESTORE AMER'S GREATNESS PAC"]<-1
ca$right[grepl("GOP",ca$FECDonors_PrimaryRecipientOfContributions)]<-1
table(ca$right)
table(ca$left)
ca$either<-ca$left+ca$right
#unique(ca$FECDonors_PrimaryRecipientOfContributions[ca$either==0])
table(ca$either)
ca<-subset(ca,subset=either==1)
dim(ca)

#define "geodata" objects for right & left wing
right.geo <- as.geodata(cbind(ca$eastings[ca$right==1], ca$northings[ca$right==1], ca$log.donations[ca$right==1]))
left.geo <- as.geodata(cbind(ca$eastings[ca$left==1], ca$northings[ca$left==1], ca$log.donations[ca$left==1]))

#create image plots with contours
int.right <- interp(x=ca$eastings[ca$right==1], y=ca$northings[ca$right==1], z=ca$log.donations[ca$right==1],duplicate='mean')#N=77566
#colFunc<-colorRampPalette(c("green","white","orange"))(9)
colFunc<-gray(9:0/(9))
pdf("rightContourCA.pdf")
image(int.right, xlim=range(ca$eastings), ylim=range(ca$northings),
	col=colFunc,xlab="Eastings",ylab="Northings",main="Right-Wing Donations")
contour(int.right, add=T)
dev.off()

int.left <- interp(x=ca$eastings[ca$left==1], y=ca$northings[ca$left==1], z=ca$log.donations[ca$left==1],duplicate='mean')#N=199626
#colFunc<-colorRampPalette(c("green","white","orange"))(9)
colFunc<-gray(9:0/(9))
pdf("leftContourCA.pdf")
image(int.left, xlim=range(ca$eastings), ylim=range(ca$northings),
	col=colFunc,xlab="Eastings",ylab="Northings",main="Left-Wing Donations")
contour(int.left, add=T)
dev.off()

#estimate simple linear models for each side
lm.right<-lm(log.donations~Vote_Frequency+Voters_Age+female+income+asian+black+latino+college,data=ca,subset=right==1); summary(lm.right)
lm.left<-lm(log.donations~Vote_Frequency+Voters_Age+female+income+asian+black+latino+college,data=ca,subset=left==1); summary(lm.left)

###FUNCTION TO RUN THE BOOTSTRAP MODELS###
bayes_model_function <- function(iterations = 1, sampleSize = 50, 
        dataset = sr.left, dv = sr.left$log.donations,
        form = ~Vote_Frequency+log(Voters_Age)+female+income+asian+black+latino+college+eastings+northings,
        x = sr.left$eastings, y = sr.left$northings, 
        low.tau = 1, high.tau = 3,
        low.phi = 50, high.phi = 1000) {
    
    # Output matrix for iterations
    if(form=="cte"){columns=4} else{
    	columns<-length(attr(terms(form),"term.labels"))+4
    	}
    all.bayes <- matrix(NA, nrow = iterations, ncol = columns)

    # Running the iterations
    for (i in 1:iterations) {
       	check<-0
      	while(check==0){
        	sample.indices <- sample(x = nrow(dataset), size = sampleSize)
        	bayes.rand <- dataset[sample.indices, ]
        	check<-min(apply(bayes.rand,2,var))
        }
        x.sub <- x[sample.indices]
        y.sub <- y[sample.indices]
        dv.sub <- dv[sample.indices]
		if(form=="cte"){
        bayes.iter <- krige.bayes(coords = cbind(x.sub, y.sub), data = dv.sub, 
            locations = NULL,
            model = model.control(trend.d = "cte",  
                cov.model = "exponential"), #best choice from Santa Rosa
            prior = prior.control(beta.prior = "flat", tausq.rel.prior = "uniform", 
                tausq.rel.discrete = seq(from = low.tau, to = high.tau, length = 100),
                phi.discrete = seq(low.phi, high.phi, length = 100)),
            output = output.control(messages = FALSE))			
			}else{
        bayes.iter <- krige.bayes(coords = cbind(x.sub, y.sub), data = dv.sub, 
            locations = NULL,
            model = model.control(trend.d = trend.spatial(form, bayes.rand),  
                cov.model = "exponential"), #best choice from Santa Rosa
            prior = prior.control(beta.prior = "flat", tausq.rel.prior = "uniform", 
                tausq.rel.discrete = seq(from = low.tau, to = high.tau, length = 100),
                phi.discrete = seq(low.phi, high.phi, length = 100)),
            output = output.control(messages = FALSE))
            }

        bayes.sub.results <- as.matrix(bayes.iter$posterior$sample)
        all.bayes[i, ] <- apply(bayes.sub.results,2,mean)
        	if(i==1){
        		colnames(all.bayes)<-names(apply(bayes.sub.results,2,mean))
        	}
        print(paste("Iteration ", i, " complete at ", Sys.time()))
    }

    # Calculate 50th, 5th, and 95th percentiles of combined matrix
    bayes.all.percentile <- t(apply(all.bayes, 2, quantile, c(0.5, 0.05, 0.95)))
	print("*** Bootstrap results: median, 5th, and 95th percentile (bayes.whole.percentile)", quote = FALSE)
    print(bayes.all.percentile)

    # Combine means of whole dataset with average.bayes.mat
    print("***Bayes mean matrix (average.bayes.mat)", quote = FALSE)
    average.bayes.mat <- matrix(apply(all.bayes, 2, mean), ncol = columns, nrow = 1)
    colnames(average.bayes.mat)<-colnames(all.bayes)
	print(average.bayes.mat)

    return(list(all.bayes = all.bayes, bayes.all.percentile = bayes.all.percentile, average.bayes.mat = average.bayes.mat))
}

#load the data
sr.right<-subset(ca,subset=right==1,select=c(log.donations,Vote_Frequency,Voters_Age,female,income,asian,black,latino,college,eastings,northings))
sr.left<-subset(ca,subset=left==1,select=c(log.donations,Vote_Frequency,Voters_Age,female,income,asian,black,latino,college,eastings,northings))

#run the models
left.universal<-bayes_model_function(iterations=50,sampleSize=1000)
left.ordinary<-bayes_model_function(iterations=50,sampleSize=1000,form="cte")
right.universal<-bayes_model_function(iterations=50,sampleSize=1000, 
        dataset = sr.right, dv = sr.right$log.donations,
        form = ~Vote_Frequency+log(Voters_Age)+female+income+asian+black+latino+college+eastings+northings,
        x = sr.right$eastings, y = sr.right$northings)
right.ordinary<-bayes_model_function(iterations=50,sampleSize=1000, 
        dataset = sr.right, dv = sr.right$log.donations,
        form = "cte", x = sr.right$eastings, y = sr.right$northings)

#output results
left.ordinary$bayes.all.percentile
row.names(left.ordinary$bayes.all.percentile)<-c("Intercept","sigma2","1/phi","tau2/sigma2")
colnames(left.ordinary$bayes.all.percentile)<-c("Median","5th Perc.","95th Perc.")
xtable(left.ordinary$bayes.all.percentile,digits=4)
length(sr.left$log.donations)

left.universal$bayes.all.percentile
row.names(left.universal$bayes.all.percentile)<-c("Intercept","Vote Frequency","Logged Age","Female","Income (12 Categories)","Asian","Black","Latino","College","Eastings","Northings","sigma2","1/phi","tau2/sigma2")
colnames(left.universal$bayes.all.percentile)<-c("Median","5th Perc.","95th Perc.")
xtable(left.universal$bayes.all.percentile,digits=4)

right.ordinary$bayes.all.percentile
row.names(right.ordinary$bayes.all.percentile)<-c("Intercept","sigma2","1/phi","tau2/sigma2")
colnames(right.ordinary$bayes.all.percentile)<-c("Median","5th Perc.","95th Perc.")
xtable(right.ordinary$bayes.all.percentile,digits=4)
length(sr.right$log.donations)

right.universal$bayes.all.percentile
row.names(right.universal$bayes.all.percentile)<-c("Intercept","Vote Frequency","Logged Age","Female","Income (12 Categories)","Asian","Black","Latino","College","Eastings","Northings","sigma2","1/phi","tau2/sigma2")
colnames(right.universal$bayes.all.percentile)<-c("Median","5th Perc.","95th Perc.")
xtable(right.universal$bayes.all.percentile,digits=4)

#Comparison graphs of variograms
#define semivariogram function
exponential.semivariogram<-function(d,nugget,partial.sill,decay){
	nugget+partial.sill*(1-exp(-decay*d))
}

#left wing
ruler<-seq(0,5000,length=500)
ord.lower<-exponential.semivariogram(d=ruler,nugget=1.2460*2.1322,partial.sill=1.2460,decay=1/259.9754)
ord.upper<-exponential.semivariogram(d=ruler,nugget=1.4988*2.3910,partial.sill=1.4988,decay=1/771.7889)
univ.lower<-exponential.semivariogram(d=ruler,nugget=1.1412*2.0886,partial.sill=1.1412,decay=1/548.2323)
univ.upper<-exponential.semivariogram(d=ruler,nugget=1.4041*2.4456,partial.sill=1.4041,decay=1/796.5815)
#pdf("srVariogramLeft.pdf")
plot(x=c(0,5000),y=c(0,5),xlab="Distance",ylab="Semivariance",type='n',main="Left-of-Center Variogram")
polygon(x=c(ruler,rev(ruler)), y=c(ord.lower,rev(ord.upper)),border=NA,col=rgb(0,0,.9,.3)) 
polygon(x=c(ruler,rev(ruler)), y=c(univ.lower,rev(univ.upper)),border=NA,col=rgb(.9,0,0,.3)) 
lines(x=ruler,y=exponential.semivariogram(d=ruler,nugget=1.3571*2.2294,partial.sill=1.3571,decay=1/630.4404),col='blue',lty=2,lwd=3)
lines(x=ruler,y=exponential.semivariogram(d=ruler,nugget=1.2923*2.2610,partial.sill=1.2923,decay=1/718.9295),col='red',lty=1,lwd=3)
legend(x=15,y=1.5,lty=c(2,1),col=c("blue","red"),legend=c("Ordinary","Universal"),lwd=3)
#dev.off()

#lines.variomodel(cov.model="exponential",cov.pars=left.ordinary$bayes.all.percentile[2:3,1],nug=I(left.ordinary$bayes.all.percentile[2,1]*left.ordinary$bayes.all.percentile[4,1]),col='blue',lty=2,max.dist=5000)
#lines.variomodel(cov.model="exponential",cov.pars=left.universal$bayes.all.percentile[12:13,1],nug=I(left.universal$bayes.all.percentile[12,1]*left.universal$bayes.all.percentile[14,1]),col='red',lty=1,max.dist=5000)

#right wing
ruler<-seq(0,5000,length=500)
ord.lower<-exponential.semivariogram(d=ruler,nugget=0.9120*2.1084,partial.sill=0.9120,decay=1/303.2921)
ord.upper<-exponential.semivariogram(d=ruler,nugget=1.1506*2.4285,partial.sill=1.1506,decay=1/781.7591)
univ.lower<-exponential.semivariogram(d=ruler,nugget=0.8679*2.1111,partial.sill=0.8679,decay=1/482.0768)
univ.upper<-exponential.semivariogram(d=ruler,nugget=1.1208*2.4169,partial.sill=1.1208,decay=1/800.4722)
#pdf("srVariogramRight.pdf")
plot(x=c(0,5000),y=c(0,5),xlab="Distance",ylab="Semivariance",type='n',main="Right-of-Center Variogram")
polygon(x=c(ruler,rev(ruler)), y=c(ord.lower,rev(ord.upper)),border=NA,col=rgb(0,0,.9,.3)) 
polygon(x=c(ruler,rev(ruler)), y=c(univ.lower,rev(univ.upper)),border=NA,col=rgb(.9,0,0,.3)) 
lines(x=ruler,y=exponential.semivariogram(d=ruler,nugget=1.0152*2.2367,partial.sill=1.0152,decay=1/596.2596),col='blue',lty=2,lwd=3)
lines(x=ruler,y=exponential.semivariogram(d=ruler,nugget=0.9621*2.2920,partial.sill=0.9621,decay=1/725.9394),col='red',lty=1,lwd=3)
legend(x=15,y=1.5,lty=c(2,1),col=c("blue","red"),legend=c("Ordinary","Universal"))
#dev.off()

